# first thing to do is importing the data
# install.packages("readxl")
readxl::read_excel("data_dec/water_stress.xlsx")
## # A tibble: 1,260 × 23
## Group Week Date Species PlantId Use Treat…¹ Soil_…² Elect…³
## <chr> <chr> <dttm> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 G1 W1 2022-10-05 00:00:00 Solanu… Slc1 cult… c 55.6 1.31
## 2 G1 W1 2022-10-05 00:00:00 Solanu… Slc2 cult… c 52.7 1.28
## 3 G1 W1 2022-10-05 00:00:00 Solanu… Slc3 cult… c 61.3 1.24
## 4 G1 W1 2022-10-05 00:00:00 Solanu… Slc4 cult… c 51 1.36
## 5 G1 W1 2022-10-05 00:00:00 Solanu… Slc5 cult… c 52.1 1.39
## 6 G1 W1 2022-10-05 00:00:00 Solanu… Slc6 cult… c 54.4 1.24
## 7 G1 W1 2022-10-05 00:00:00 Solanu… Slc7 cult… c 51.6 1.51
## 8 G1 W1 2022-10-05 00:00:00 Amaran… Arc1 wild c 54.6 1.68
## 9 G1 W1 2022-10-05 00:00:00 Amaran… Arc2 wild c 50.9 1.93
## 10 G1 W1 2022-10-05 00:00:00 Amaran… Arc3 wild c 67.2 1.89
## # … with 1,250 more rows, 14 more variables: Too_dry <chr>, Plant_height <dbl>,
## # Leaf_number <dbl>, Leaf_length <dbl>, Leaf_width <dbl>, Leaf_area <dbl>,
## # Chlorophyll_content <dbl>, Aerial_fresh_weight <dbl>,
## # Aerial_dry_weight <dbl>, Root_length <dbl>, Roots_fresh_weight <dbl>,
## # Roots_dry_weight <dbl>, Mortality <chr>, Comments <lgl>, and abbreviated
## # variable names ¹​Treatment, ²​Soil_humidity, ³​Electrical_conductivity
d0 <- readxl::read_excel("data_dec/water_stress.xlsx", sheet = "Data")
after importing the data and give it the name of d0, we are going to visualize it, by creation different plots.
##Visualization of data with plots. Create many plots.
X= Date Y= Variable Y1= Plant height Y2= Leaf number Y3= Leaf lenght Y4= Leaf width Y5= Leaf area Y7= Chlorophyll
# install.packages("ggplot2", dependencies = TRUE)
library(ggplot2)
# first we are going to visualize all the species in a plot
ggplot(d0, aes(x= Date, y= Plant_height, group= PlantId, color= Treatment)) +
geom_line()+
facet_grid(Species ~.)
## Warning: Removed 3 rows containing missing values (`geom_line()`).
in this code chunk we are try to visualize one species, to know how to plot it first
# For Solanum lycopersicum
s1 <- d0[d0$Species=="Solanum lycopersicum",]
ggplot(s1, aes(x= Date, y= Plant_height, group= PlantId, color= Treatment)) +
geom_line()
then we will create a for loop to visualize all the species and all the variables
v1 <- c("Plant_height", "Leaf_width", "Leaf_length", "Leaf_area", "Leaf_number", "Root_length", "Chlorophyll_content", "Soil_humidity", "Electrical_conductivity")
i <- "Beta vulgaris"
variable <- "Plant_height"
for(i in levels(as.factor(d0$Species))) {
for(variable in v1) {
s1 <- d0[d0$Species==i, c(variable, "Week", "PlantId", "Treatment")]
s1 <- na.exclude(s1)
p <- ggplot(s1, aes(x= Week, y= .data[[variable]], group= PlantId, color= Treatment)) +
geom_line() +
labs(title = i)
print(p)
}
}
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## Chapter 2 - ANOVA
in this part we are trying to do an ANOVA analysis for the data
```r
#Packages
#install.packages(c("ggplot2", "ggpubr", "tidyverse", "broom", "AICcmodavg"), , dependencies = TRUE)
library(ggplot2)
library(ggpubr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ✔ purrr 0.3.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(broom)
#Import data
d0 <- readxl::read_excel("data_dec/water_stress.xlsx", sheet = "Data")
d0
## # A tibble: 1,260 × 23
## Group Week Date Species PlantId Use Treat…¹ Soil_…² Elect…³
## <chr> <chr> <dttm> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 G1 W1 2022-10-05 00:00:00 Solanu… Slc1 cult… c 55.6 1.31
## 2 G1 W1 2022-10-05 00:00:00 Solanu… Slc2 cult… c 52.7 1.28
## 3 G1 W1 2022-10-05 00:00:00 Solanu… Slc3 cult… c 61.3 1.24
## 4 G1 W1 2022-10-05 00:00:00 Solanu… Slc4 cult… c 51 1.36
## 5 G1 W1 2022-10-05 00:00:00 Solanu… Slc5 cult… c 52.1 1.39
## 6 G1 W1 2022-10-05 00:00:00 Solanu… Slc6 cult… c 54.4 1.24
## 7 G1 W1 2022-10-05 00:00:00 Solanu… Slc7 cult… c 51.6 1.51
## 8 G1 W1 2022-10-05 00:00:00 Amaran… Arc1 wild c 54.6 1.68
## 9 G1 W1 2022-10-05 00:00:00 Amaran… Arc2 wild c 50.9 1.93
## 10 G1 W1 2022-10-05 00:00:00 Amaran… Arc3 wild c 67.2 1.89
## # … with 1,250 more rows, 14 more variables: Too_dry <chr>, Plant_height <dbl>,
## # Leaf_number <dbl>, Leaf_length <dbl>, Leaf_width <dbl>, Leaf_area <dbl>,
## # Chlorophyll_content <dbl>, Aerial_fresh_weight <dbl>,
## # Aerial_dry_weight <dbl>, Root_length <dbl>, Roots_fresh_weight <dbl>,
## # Roots_dry_weight <dbl>, Mortality <chr>, Comments <lgl>, and abbreviated
## # variable names ¹​Treatment, ²​Soil_humidity, ³​Electrical_conductivity
names(d0)
## [1] "Group" "Week"
## [3] "Date" "Species"
## [5] "PlantId" "Use"
## [7] "Treatment" "Soil_humidity"
## [9] "Electrical_conductivity" "Too_dry"
## [11] "Plant_height" "Leaf_number"
## [13] "Leaf_length" "Leaf_width"
## [15] "Leaf_area" "Chlorophyll_content"
## [17] "Aerial_fresh_weight" "Aerial_dry_weight"
## [19] "Root_length" "Roots_fresh_weight"
## [21] "Roots_dry_weight" "Mortality"
## [23] "Comments"
Most visual difference is in week 6 (w6)
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Plant_height") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Plant_height ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Plant_height
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 466 232.8 7.5815 0.000672 ***
## Species 8 59922 7490.3 243.9062 < 2.2e-16 ***
## Residuals 198 6081 30.7
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm1)
##
## Call:
## lm(formula = Plant_height ~ Treatment + Species, data = d1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.3858 -2.9645 -0.2497 2.4476 21.2022
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.9645 1.0121 14.785 < 2e-16 ***
## Treatmenti -0.2671 0.9367 -0.285 0.775794
## Treatments -3.4121 0.9402 -3.629 0.000362 ***
## SpeciesBeta vulgaris 3.9026 1.5058 2.592 0.010261 *
## SpeciesHordeum vulgare 37.1571 1.4811 25.088 < 2e-16 ***
## SpeciesLolium perenne 26.6762 1.4811 18.011 < 2e-16 ***
## SpeciesPortulaca oleracea -7.0476 1.4811 -4.758 3.76e-06 ***
## SpeciesRaphanus sativus 6.0476 1.4811 4.083 6.44e-05 ***
## SpeciesSolanum lycopersicum 44.8333 1.4811 30.271 < 2e-16 ***
## SpeciesSonchus oleraceus 4.4619 1.4811 3.013 0.002928 **
## SpeciesSpinacia oleracea 0.9381 1.4811 0.633 0.527208
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.542 on 198 degrees of freedom
## Multiple R-squared: 0.9085, Adjusted R-squared: 0.9039
## F-statistic: 196.6 on 10 and 198 DF, p-value: < 2.2e-16
ggplot(d1, aes(x= Treatment, y= Plant_height, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W1" , c("Treatment", "Species", "Leaf_number") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Leaf_number ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Leaf_number
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 13.74 6.871 4.1003 0.01799 *
## Species 8 618.29 77.286 46.1175 < 2e-16 ***
## Residuals 199 333.50 1.676
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y= Leaf_number, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Leaf_number") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Leaf_number ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Leaf_number
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 304.1 152.06 15.356 6.315e-07 ***
## Species 8 3971.2 496.40 50.128 < 2.2e-16 ***
## Residuals 198 1960.7 9.90
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y= Leaf_number, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W1" , c("Treatment", "Species", "Leaf_length") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Leaf_length ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Leaf_length
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 2.0 0.99 0.5525 0.5764
## Species 8 5750.9 718.87 402.4626 <2e-16 ***
## Residuals 199 355.4 1.79
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y= Leaf_length, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Leaf_length") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Leaf_length ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Leaf_length
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 79.8 39.9 6.1425 0.002581 **
## Species 8 30954.5 3869.3 595.6664 < 2.2e-16 ***
## Residuals 198 1286.2 6.5
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y= Leaf_length, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W1" , c("Treatment", "Species", "Leaf_width") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Leaf_width ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Leaf_width
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 3.62 1.809 5.1477 0.006612 **
## Species 8 704.03 88.004 250.4667 < 2.2e-16 ***
## Residuals 199 69.92 0.351
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Leaf_width, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Leaf_width") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Leaf_width ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Leaf_width
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 30.0 15.00 11.645 1.654e-05 ***
## Species 8 5014.3 626.78 486.695 < 2.2e-16 ***
## Residuals 198 255.0 1.29
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Leaf_width, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W1" , c("Treatment", "Species", "Leaf_area") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Leaf_area ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Leaf_area
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 128.4 64.21 4.3722 0.01386 *
## Species 8 11029.3 1378.66 93.8798 < 2e-16 ***
## Residuals 199 2922.4 14.69
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Leaf_area, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Leaf_area") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Leaf_area ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Leaf_area
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 7957 3978.3 29.755 5.025e-12 ***
## Species 8 153005 19125.7 143.048 < 2.2e-16 ***
## Residuals 198 26473 133.7
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Leaf_area, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W3" , c("Treatment", "Species", "Chlorophyll_content") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Chlorophyll_content ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Chlorophyll_content
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 107.26 53.630 6.5679 0.002311 **
## Species 3 298.82 99.607 12.1985 1.258e-06 ***
## Residuals 78 636.91 8.166
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Chlorophyll_content, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
######
Week 4
d1 <- d0[d0$Week == "W4" , c("Treatment", "Species", "Chlorophyll_content") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Chlorophyll_content ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Chlorophyll_content
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 15.56 7.780 0.8133 0.4459
## Species 5 533.87 106.774 11.1623 8.283e-09 ***
## Residuals 117 1119.17 9.566
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Chlorophyll_content, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
######
Week 5
d1 <- d0[d0$Week == "W5" , c("Treatment", "Species", "Chlorophyll_content") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Chlorophyll_content ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Chlorophyll_content
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 23.87 11.94 0.8801 0.4168
## Species 6 2018.13 336.36 24.7988 <2e-16 ***
## Residuals 158 2143.01 13.56
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Chlorophyll_content, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
######
Week 6
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Chlorophyll_content") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Chlorophyll_content ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Chlorophyll_content
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 20.7 10.36 0.5908 0.5551
## Species 6 4832.8 805.47 45.9083 <2e-16 ***
## Residuals 158 2772.1 17.55
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Chlorophyll_content, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Aerial_fresh_weight") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Aerial_fresh_weight ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Aerial_fresh_weight
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 1679.2 839.60 63.034 < 2.2e-16 ***
## Species 8 5582.4 697.80 52.389 < 2.2e-16 ***
## Residuals 199 2650.6 13.32
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm1)
##
## Call:
## lm(formula = Aerial_fresh_weight ~ Treatment + Species, data = d1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.8460 -2.7587 -0.1937 2.4277 12.7065
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.4314 0.6663 6.650 2.76e-10 ***
## Treatmenti -3.8427 0.6169 -6.229 2.74e-09 ***
## Treatments -6.9121 0.6169 -11.205 < 2e-16 ***
## SpeciesBeta vulgaris 10.7031 0.9754 10.973 < 2e-16 ***
## SpeciesHordeum vulgare 5.6631 0.9754 5.806 2.50e-08 ***
## SpeciesLolium perenne 1.0131 0.9754 1.039 0.300230
## SpeciesPortulaca oleracea 0.6617 0.9754 0.678 0.498335
## SpeciesRaphanus sativus 8.0121 0.9754 8.214 2.68e-14 ***
## SpeciesSolanum lycopersicum 15.2498 0.9754 15.634 < 2e-16 ***
## SpeciesSonchus oleraceus 10.9274 0.9754 11.203 < 2e-16 ***
## SpeciesSpinacia oleracea 3.5202 0.9754 3.609 0.000389 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.65 on 199 degrees of freedom
## Multiple R-squared: 0.7326, Adjusted R-squared: 0.7192
## F-statistic: 54.52 on 10 and 199 DF, p-value: < 2.2e-16
ggplot(d1, aes(x= Treatment, y=Aerial_fresh_weight, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Aerial_dry_weight") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Aerial_dry_weight ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Aerial_dry_weight
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 3.332 1.6659 21.743 2.881e-09 ***
## Species 8 58.020 7.2525 94.658 < 2.2e-16 ***
## Residuals 199 15.247 0.0766
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm1)
##
## Call:
## lm(formula = Aerial_dry_weight ~ Treatment + Species, data = d1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8492 -0.1291 -0.0430 0.1474 1.3100
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.199238 0.050537 3.942 0.000112 ***
## Treatmenti -0.090571 0.046788 -1.936 0.054310 .
## Treatments -0.300714 0.046788 -6.427 9.41e-10 ***
## SpeciesBeta vulgaris 0.663095 0.073978 8.963 2.30e-16 ***
## SpeciesHordeum vulgare 0.621667 0.073978 8.403 8.19e-15 ***
## SpeciesLolium perenne 0.088810 0.073978 1.200 0.231377
## SpeciesPortulaca oleracea 0.004048 0.073978 0.055 0.956421
## SpeciesRaphanus sativus 0.810238 0.073978 10.952 < 2e-16 ***
## SpeciesSolanum lycopersicum 1.473095 0.073978 19.913 < 2e-16 ***
## SpeciesSonchus oleraceus 1.300714 0.073978 17.582 < 2e-16 ***
## SpeciesSpinacia oleracea 0.148810 0.073978 2.012 0.045617 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2768 on 199 degrees of freedom
## Multiple R-squared: 0.801, Adjusted R-squared: 0.7909
## F-statistic: 80.08 on 10 and 199 DF, p-value: < 2.2e-16
ggplot(d1, aes(x= Treatment, y=Aerial_dry_weight, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Root_length") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Root_length ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Root_length
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 38.4 19.19 0.5121 0.6
## Species 8 19277.3 2409.66 64.2873 <2e-16 ***
## Residuals 199 7459.1 37.48
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm1)
##
## Call:
## lm(formula = Root_length ~ Treatment + Species, data = d1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.2830 -2.8891 -0.4475 1.9244 27.2872
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.1226 1.1178 3.688 0.000291 ***
## Treatmenti -0.3986 1.0349 -0.385 0.700500
## Treatments 0.6394 1.0349 0.618 0.537392
## SpeciesBeta vulgaris 16.0534 1.6363 9.811 < 2e-16 ***
## SpeciesHordeum vulgare 20.5303 1.6363 12.547 < 2e-16 ***
## SpeciesLolium perenne 10.8008 1.6363 6.601 3.63e-10 ***
## SpeciesPortulaca oleracea 0.2543 1.6363 0.155 0.876635
## SpeciesRaphanus sativus 23.3591 1.6363 14.276 < 2e-16 ***
## SpeciesSolanum lycopersicum 20.8115 1.6363 12.719 < 2e-16 ***
## SpeciesSonchus oleraceus 23.0820 1.6363 14.107 < 2e-16 ***
## SpeciesSpinacia oleracea 3.5060 1.6363 2.143 0.033355 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.122 on 199 degrees of freedom
## Multiple R-squared: 0.7214, Adjusted R-squared: 0.7074
## F-statistic: 51.53 on 10 and 199 DF, p-value: < 2.2e-16
ggplot(d1, aes(x= Treatment, y=Root_length, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
in this part we are going to do a PCA analysis for the data
#importing data
#we already imported the data in the previous parts, that's why the functions of importing the data are commented
#readxl::read_excel("data_dec/water_stress.xlsx")
#d0 <- readxl::read_excel("data_dec/water_stress.xlsx", sheet = "Data")
Next we need to make a subset for the data with only numerical variables and exclude the other variables, scale the data and after that we will exclude all the missing data
PCA_data <- d0[c(8:9, 11:21)]
PCA_data_scaled <- as.data.frame(scale(PCA_data))
view(PCA_data_scaled)
# exclude missing values NA
PCA_data01 <- na.exclude(PCA_data_scaled)
now we are going to do a PCA of the data
PCA <- prcomp(PCA_data01, scale = FALSE)
print(PCA)
## Standard deviations (1, .., p=13):
## [1] 2.63773825 1.42968289 1.25675328 1.14973760 0.60657249 0.45294627
## [7] 0.26654708 0.23047149 0.21463797 0.18799706 0.16704258 0.12201297
## [13] 0.04922841
##
## Rotation (n x k) = (13 x 13):
## PC1 PC2 PC3 PC4
## Soil_humidity -0.004256694 0.049900003 -0.11766668 -0.248076044
## Electrical_conductivity -0.008013608 -0.005958900 -0.01650870 -0.037220428
## Plant_height -0.318400022 -0.019301974 -0.14618216 -0.384197630
## Leaf_number 0.192417874 -0.635956160 0.64567732 -0.355378847
## Leaf_length -0.205046605 -0.120327308 -0.25414660 -0.067997783
## Leaf_width -0.404071638 -0.113683193 -0.12490583 -0.280621897
## Leaf_area -0.461349314 -0.066980401 0.02731468 -0.114930278
## Chlorophyll_content -0.029133684 -0.720138948 -0.40752491 0.500642375
## Aerial_fresh_weight -0.354856181 -0.114183261 0.04683571 0.004752317
## Aerial_dry_weight -0.301730076 -0.060396919 0.04286553 -0.085152036
## Root_length -0.231654040 -0.001973725 0.10434426 0.102019675
## Roots_fresh_weight -0.363554181 0.150594043 0.49919198 0.523106846
## Roots_dry_weight -0.199261903 0.052958508 0.19101725 0.156922363
## PC5 PC6 PC7 PC8
## Soil_humidity 0.81632811 -0.28366669 -0.12976846 0.14824658
## Electrical_conductivity 0.36619435 -0.21981417 0.18023186 -0.29602259
## Plant_height -0.01315620 0.28915873 0.38028471 -0.35397318
## Leaf_number 0.04252025 0.05531393 0.04014367 0.04032970
## Leaf_length 0.06378036 0.23108937 0.19136667 0.21136522
## Leaf_width 0.04771203 0.30194646 0.01428868 0.04524855
## Leaf_area -0.10723770 -0.21713824 -0.09282013 0.32280363
## Chlorophyll_content 0.11999457 0.01285808 -0.01173686 -0.09367830
## Aerial_fresh_weight -0.26127660 -0.68904572 0.25688104 0.12860300
## Aerial_dry_weight -0.08327125 -0.11580426 -0.68050742 -0.57633112
## Root_length 0.08664442 0.28072338 -0.41475877 0.47650556
## Roots_fresh_weight 0.27203869 0.14253877 0.23696538 -0.12580896
## Roots_dry_weight 0.09259695 0.07305261 0.04017830 -0.11039856
## PC9 PC10 PC11 PC12
## Soil_humidity -0.263992614 0.254148826 -0.031390777 0.03297923
## Electrical_conductivity 0.539341880 -0.602942277 0.156682713 -0.12751678
## Plant_height -0.214741259 0.190067851 0.424838068 -0.32469782
## Leaf_number -0.006294311 -0.005400906 -0.065154717 -0.05398473
## Leaf_length -0.017141490 -0.185513523 -0.714750875 -0.43562912
## Leaf_width 0.043092731 -0.215567147 -0.083290061 0.76056601
## Leaf_area 0.618659582 0.440417654 0.079287497 -0.11915925
## Chlorophyll_content -0.013419756 0.118901682 0.156414002 0.03939421
## Aerial_fresh_weight -0.407301628 -0.256283608 0.020002787 0.03402097
## Aerial_dry_weight -0.079910332 -0.027390774 -0.225535675 -0.12276321
## Root_length -0.172190130 -0.404403108 0.418206586 -0.26483199
## Roots_fresh_weight -0.033349928 0.116738627 -0.122348454 0.05776325
## Roots_dry_weight -0.071779461 0.055883100 -0.006510435 -0.02497598
## PC13
## Soil_humidity -0.008525714
## Electrical_conductivity 0.021768187
## Plant_height -0.086121045
## Leaf_number -0.002813705
## Leaf_length 0.012070886
## Leaf_width 0.005325747
## Leaf_area 0.007473400
## Chlorophyll_content 0.004409018
## Aerial_fresh_weight -0.010657753
## Aerial_dry_weight -0.086931843
## Root_length -0.037649311
## Roots_fresh_weight -0.350667483
## Roots_dry_weight 0.927212736
now we will plot the PCA results
# Plotting the PCA results
# install.packages("factoextra")
#if(!require(devtools)) install.packages("devtools")
#devtools::install_github("kassambara/factoextra")
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(ggplot2)
fviz_eig(PCA)
# graph for individuals
fviz_pca_ind(PCA,
col.ind = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)
## Warning: ggrepel: 32 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
# graph of variable
fviz_pca_var(PCA,
col.var = "contrib", # Color by contributions to the PC
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)